{
 "metadata": {
  "name": "",
  "signature": "sha256:6347f020d116705f98cd6b2926fde2e333bcbb20ddb1320af3181a63fc97ffdc"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "<img src=http://continuum.io/media/img/continuum_analytics_logo.png align=\"right\" width=\"30%\">\n",
      "# Playing with Spark\n",
      "\n",
      "This notebook builds off of the HMDA dataset introduced in the following blogpost: http://continuum.io/blog/blaze-hmda"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import blaze\n",
      "from blaze import Table, into"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blaze.__version__"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "'0.6.5'"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Load HMDA data from local Mongo database"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "hmda = Table('mongodb://localhost/db::hmda')\n",
      "hmda"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>action_taken_name</th>\n",
        "      <th>agency_abbr</th>\n",
        "      <th>applicant_ethnicity_name</th>\n",
        "      <th>applicant_race_name_1</th>\n",
        "      <th>applicant_sex_name</th>\n",
        "      <th>county_name</th>\n",
        "      <th>loan_purpose_name</th>\n",
        "      <th>state_abbr</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>      Will County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> IL</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> NCUA</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>   Midland County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> MI</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2 </th>\n",
        "      <td> Loan purchased by the institution</td>\n",
        "      <td> CFPB</td>\n",
        "      <td>         Not applicable</td>\n",
        "      <td>            Not applicable</td>\n",
        "      <td> Not applicable</td>\n",
        "      <td>    Benton County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> AR</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3 </th>\n",
        "      <td> Loan purchased by the institution</td>\n",
        "      <td> CFPB</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>         Female</td>\n",
        "      <td>    Ramsey County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> MN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> FDIC</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>     Allen County</td>\n",
        "      <td> Home improvement</td>\n",
        "      <td> IN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>      Cook County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> IL</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td> Black or African American</td>\n",
        "      <td>           Male</td>\n",
        "      <td> Calcasieu Parish</td>\n",
        "      <td>    Home purchase</td>\n",
        "      <td> LA</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>     Grand County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> CO</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> FDIC</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>         Female</td>\n",
        "      <td>     Allen County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> IN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> CFPB</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>    Talbot County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> MD</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td> Calcasieu Parish</td>\n",
        "      <td>    Home purchase</td>\n",
        "      <td> LA</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "                    action_taken_name agency_abbr applicant_ethnicity_name  \\\n",
        "0                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "1                     Loan originated        NCUA   Not Hispanic or Latino   \n",
        "2   Loan purchased by the institution        CFPB           Not applicable   \n",
        "3   Loan purchased by the institution        CFPB   Not Hispanic or Latino   \n",
        "4                     Loan originated        FDIC   Not Hispanic or Latino   \n",
        "5                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "6                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "7                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "8                     Loan originated        FDIC   Not Hispanic or Latino   \n",
        "9                     Loan originated        CFPB   Not Hispanic or Latino   \n",
        "10                    Loan originated         HUD   Not Hispanic or Latino   \n",
        "\n",
        "        applicant_race_name_1 applicant_sex_name       county_name  \\\n",
        "0                       White               Male       Will County   \n",
        "1                       White               Male    Midland County   \n",
        "2              Not applicable     Not applicable     Benton County   \n",
        "3                       White             Female     Ramsey County   \n",
        "4                       White               Male      Allen County   \n",
        "5                       White               Male       Cook County   \n",
        "6   Black or African American               Male  Calcasieu Parish   \n",
        "7                       White               Male      Grand County   \n",
        "8                       White             Female      Allen County   \n",
        "9                       White               Male     Talbot County   \n",
        "10                      White               Male  Calcasieu Parish   \n",
        "\n",
        "   loan_purpose_name state_abbr  \n",
        "0        Refinancing         IL  \n",
        "1        Refinancing         MI  \n",
        "2        Refinancing         AR  \n",
        "3        Refinancing         MN  \n",
        "4   Home improvement         IN  \n",
        "5        Refinancing         IL  \n",
        "6      Home purchase         LA  \n",
        "7        Refinancing         CO  \n",
        "8        Refinancing         IN  \n",
        "9        Refinancing         MD  \n",
        "..."
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Create a local PySpark instance\n",
      "\n",
      "This could point to a cluster"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pyspark\n",
      "sc = pyspark.SparkContext('local', 'test-app')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Playing with PySpark\n",
      "\n",
      "1.  Move 10000 rows from Mongo to spark\n",
      "2.  Find the possible actions taken (first column)\n",
      "3.  Put this result into a csv file"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rdd = into(sc, hmda.head(10000))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rdd.take(3)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "[(u'Loan originated',\n",
        "  u'HUD',\n",
        "  u'Not Hispanic or Latino',\n",
        "  u'White',\n",
        "  u'Male',\n",
        "  u'Will County',\n",
        "  u'Refinancing',\n",
        "  u'IL'),\n",
        " (u'Loan originated',\n",
        "  u'NCUA',\n",
        "  u'Not Hispanic or Latino',\n",
        "  u'White',\n",
        "  u'Male',\n",
        "  u'Midland County',\n",
        "  u'Refinancing',\n",
        "  u'MI'),\n",
        " (u'Loan purchased by the institution',\n",
        "  u'CFPB',\n",
        "  u'Not applicable',\n",
        "  u'Not applicable',\n",
        "  u'Not applicable',\n",
        "  u'Benton County',\n",
        "  u'Refinancing',\n",
        "  u'AR')]"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Grab distinct elements of first column"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rdd.map(lambda x: x[0]).distinct()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "PythonRDD[7] at RDD at PythonRDD.scala:43"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "That returned an RDD.  Lets bring to local memory with `.collect()`"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rdd.map(lambda x: x[0]).distinct().collect()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 8,
       "text": [
        "[u'Loan originated',\n",
        " u'Application denied by financial institution',\n",
        " u'Application approved but not accepted',\n",
        " u'Loan purchased by the institution',\n",
        " u'Application withdrawn by applicant',\n",
        " u'File closed for incompleteness']"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Now lets use `blaze.into` to write those results to a file."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "into('myfile.csv', rdd.map(lambda x: x[0]).distinct().collect())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 9,
       "text": [
        "<blaze.data.csv.CSV at 0x7f8648810290>"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!head myfile.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Loan originated\r",
        "\r\n",
        "Application denied by financial institution\r",
        "\r\n",
        "Application approved but not accepted\r",
        "\r\n",
        "Loan purchased by the institution\r",
        "\r\n",
        "Application withdrawn by applicant\r",
        "\r\n",
        "File closed for incompleteness\r",
        "\r\n",
        "Loan originated\r",
        "\r\n",
        "Application denied by financial institution\r",
        "\r\n",
        "Application approved but not accepted\r",
        "\r\n",
        "Loan purchased by the institution\r",
        "\r\n"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Blaze drives PySpark\n",
      "\n",
      "We do the exact same work, but now driving with Blaze.  The computation is the same, only the interface is different.\n",
      "\n",
      "1.  Wrap a `Table` around the rdd\n",
      "2.  Find the possible actions taken\n",
      "3.  Put this result into a variety of "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "t = Table(rdd, columns=hmda.columns)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "t"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>action_taken_name</th>\n",
        "      <th>agency_abbr</th>\n",
        "      <th>applicant_ethnicity_name</th>\n",
        "      <th>applicant_race_name_1</th>\n",
        "      <th>applicant_sex_name</th>\n",
        "      <th>county_name</th>\n",
        "      <th>loan_purpose_name</th>\n",
        "      <th>state_abbr</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>      Will County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> IL</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> NCUA</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>   Midland County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> MI</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2 </th>\n",
        "      <td> Loan purchased by the institution</td>\n",
        "      <td> CFPB</td>\n",
        "      <td>         Not applicable</td>\n",
        "      <td>            Not applicable</td>\n",
        "      <td> Not applicable</td>\n",
        "      <td>    Benton County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> AR</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3 </th>\n",
        "      <td> Loan purchased by the institution</td>\n",
        "      <td> CFPB</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>         Female</td>\n",
        "      <td>    Ramsey County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> MN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> FDIC</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>     Allen County</td>\n",
        "      <td> Home improvement</td>\n",
        "      <td> IN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>      Cook County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> IL</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td> Black or African American</td>\n",
        "      <td>           Male</td>\n",
        "      <td> Calcasieu Parish</td>\n",
        "      <td>    Home purchase</td>\n",
        "      <td> LA</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>     Grand County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> CO</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> FDIC</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>         Female</td>\n",
        "      <td>     Allen County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> IN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td> CFPB</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td>    Talbot County</td>\n",
        "      <td>      Refinancing</td>\n",
        "      <td> MD</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>                   Loan originated</td>\n",
        "      <td>  HUD</td>\n",
        "      <td> Not Hispanic or Latino</td>\n",
        "      <td>                     White</td>\n",
        "      <td>           Male</td>\n",
        "      <td> Calcasieu Parish</td>\n",
        "      <td>    Home purchase</td>\n",
        "      <td> LA</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 12,
       "text": [
        "                    action_taken_name agency_abbr applicant_ethnicity_name  \\\n",
        "0                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "1                     Loan originated        NCUA   Not Hispanic or Latino   \n",
        "2   Loan purchased by the institution        CFPB           Not applicable   \n",
        "3   Loan purchased by the institution        CFPB   Not Hispanic or Latino   \n",
        "4                     Loan originated        FDIC   Not Hispanic or Latino   \n",
        "5                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "6                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "7                     Loan originated         HUD   Not Hispanic or Latino   \n",
        "8                     Loan originated        FDIC   Not Hispanic or Latino   \n",
        "9                     Loan originated        CFPB   Not Hispanic or Latino   \n",
        "10                    Loan originated         HUD   Not Hispanic or Latino   \n",
        "\n",
        "        applicant_race_name_1 applicant_sex_name       county_name  \\\n",
        "0                       White               Male       Will County   \n",
        "1                       White               Male    Midland County   \n",
        "2              Not applicable     Not applicable     Benton County   \n",
        "3                       White             Female     Ramsey County   \n",
        "4                       White               Male      Allen County   \n",
        "5                       White               Male       Cook County   \n",
        "6   Black or African American               Male  Calcasieu Parish   \n",
        "7                       White               Male      Grand County   \n",
        "8                       White             Female      Allen County   \n",
        "9                       White               Male     Talbot County   \n",
        "10                      White               Male  Calcasieu Parish   \n",
        "\n",
        "   loan_purpose_name state_abbr  \n",
        "0        Refinancing         IL  \n",
        "1        Refinancing         MI  \n",
        "2        Refinancing         AR  \n",
        "3        Refinancing         MN  \n",
        "4   Home improvement         IN  \n",
        "5        Refinancing         IL  \n",
        "6      Home purchase         LA  \n",
        "7        Refinancing         CO  \n",
        "8        Refinancing         IN  \n",
        "9        Refinancing         MD  \n",
        "..."
       ]
      }
     ],
     "prompt_number": 12
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We can easily inspect the table, just like we would in pandas."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "t.action_taken_name"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>action_taken_name</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2 </th>\n",
        "      <td> Loan purchased by the institution</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3 </th>\n",
        "      <td> Loan purchased by the institution</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9 </th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>                   Loan originated</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 13,
       "text": [
        "                    action_taken_name\n",
        "0                     Loan originated\n",
        "1                     Loan originated\n",
        "2   Loan purchased by the institution\n",
        "3   Loan purchased by the institution\n",
        "4                     Loan originated\n",
        "5                     Loan originated\n",
        "6                     Loan originated\n",
        "7                     Loan originated\n",
        "8                     Loan originated\n",
        "9                     Loan originated\n",
        "..."
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "All of the (meta)data movement is handled, giving the user a natural interactive experience."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "t.action_taken_name.distinct()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>action_taken_name</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>                             Loan originated</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Application denied by financial institution</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>       Application approved but not accepted</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>           Loan purchased by the institution</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>          Application withdrawn by applicant</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>              File closed for incompleteness</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 14,
       "text": [
        "                             action_taken_name\n",
        "0                              Loan originated\n",
        "1  Application denied by financial institution\n",
        "2        Application approved but not accepted\n",
        "3            Loan purchased by the institution\n",
        "4           Application withdrawn by applicant\n",
        "5               File closed for incompleteness"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "into(list, t.action_taken_name.distinct())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 15,
       "text": [
        "[u'Loan originated',\n",
        " u'Application denied by financial institution',\n",
        " u'Application approved but not accepted',\n",
        " u'Loan purchased by the institution',\n",
        " u'Application withdrawn by applicant',\n",
        " u'File closed for incompleteness']"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Main Points\n",
      "\n",
      "Blaze provides a lightweight wrapper around PySpark, giving a familiar interface to a powerful platform."
     ]
    }
   ],
   "metadata": {}
  }
 ]
}